{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from torch import nn\n",
    "from torch.nn import init\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sequential(\n",
      "  (0): Linear(in_features=4, out_features=3, bias=True)\n",
      "  (1): ReLU()\n",
      "  (2): Linear(in_features=3, out_features=1, bias=True)\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "# pytorch会默认初始化\n",
    "net = nn.Sequential(\n",
    "    nn.Linear(4,3), \n",
    "    nn.ReLU(),\n",
    "    nn.Linear(3,1)\n",
    ")\n",
    "\n",
    "print(net)\n",
    "x = torch.rand(2, 4)\n",
    "y = net(x).sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 访问模型参数\n",
    "对于Sequential实例中含模型参数的层，可以通过Module类的parameters()或者named_parameters方法来访问所有参数（以迭代器的形式返回），后者除了返回参数Tensor外还会返回其名字"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'generator'>\n",
      "<class 'generator'>\n",
      "0.weight torch.Size([3, 4])\n",
      "0.bias torch.Size([3])\n",
      "2.weight torch.Size([1, 3])\n",
      "2.bias torch.Size([1])\n"
     ]
    }
   ],
   "source": [
    "print(type(net.parameters()))\n",
    "print(type(net.named_parameters()))\n",
    "for name, param in net.named_parameters():\n",
    "    print(name, param.size())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "返回的名字自动加上了层数的索引作为前缀。对于使用Sequential类构造的神经网络，可以通过方括号[ ]来访问网络的任一层。这样就不会有层数索引的前缀"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "weight torch.Size([3, 4]) <class 'torch.nn.parameter.Parameter'>\n",
      "bias torch.Size([3]) <class 'torch.nn.parameter.Parameter'>\n"
     ]
    }
   ],
   "source": [
    "for name, param in net[0].named_parameters():\n",
    "    print(name, param.size(), type(param))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "返回的param的类型为torch.nn.parameter.Parameter，其实这是Tensor的子类，和Tensor不同的是如果一个Tensor是Parameter，那么它会自动被添加到模型的参数列表里"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "weight1\n"
     ]
    }
   ],
   "source": [
    "class MyModule(nn.Module):\n",
    "    def __init__(self, **kwargs):\n",
    "        super(MyModule, self).__init__(**kwargs)\n",
    "        self.weight1 = nn.Parameter(torch.rand(20, 20))\n",
    "        self.weight2 = torch.rand(20, 20)\n",
    "        \n",
    "    def forward(self, x):\n",
    "        pass\n",
    "    \n",
    "n = MyModule()\n",
    "for name, param in n.named_parameters():\n",
    "    print(name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "上面的代码中weight1在参数列表中但是weight2却没在参数列表中"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[ 0.3639,  0.3870, -0.0327, -0.2829],\n",
      "        [-0.3787,  0.4818,  0.0030,  0.2823],\n",
      "        [ 0.0085,  0.0526,  0.3155,  0.0960]])\n",
      "None\n",
      "tensor([[-0.0920, -0.0931, -0.0607, -0.0552],\n",
      "        [-0.2014, -0.2038, -0.1330, -0.1208],\n",
      "        [ 0.0000,  0.0000,  0.0000,  0.0000]])\n"
     ]
    }
   ],
   "source": [
    "# 因为Parameter是Tensor，即Tensor拥有的属性它都有，\n",
    "# 比如可以根据data来访问参数数值，用grad来访问参数梯度\n",
    "\n",
    "weight_0 = list(net[0].parameters())[0]\n",
    "print(weight_0.data)\n",
    "print(weight_0.grad) # 反向传播前梯度为None\n",
    "y.backward()\n",
    "print(weight_0.grad)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 初始化模型参数\n",
    "PyTorch中nn.Module的模块参数都采取了较为合理的初始化策略（不同类型的layer具体采样的哪一种初始化方法的可参考源代码）。也可以使用其他方法来初始化权重，PyTorch的init模块里提供了多种预设的初始化方法。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.weight tensor([[ 0.0010,  0.0125,  0.0181,  0.0046],\n",
      "        [-0.0102, -0.0020,  0.0114, -0.0025],\n",
      "        [-0.0110,  0.0131, -0.0039,  0.0007]])\n",
      "2.weight tensor([[0.0127, 0.0053, 0.0057]])\n"
     ]
    }
   ],
   "source": [
    "for name, param in net.named_parameters():\n",
    "    if 'weight' in name:\n",
    "        init.normal_(param, mean=0, std=0.01)\n",
    "        print(name, param.data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.bias tensor([0., 0., 0.])\n",
      "2.bias tensor([0.])\n"
     ]
    }
   ],
   "source": [
    "for name, param in net.named_parameters():\n",
    "    if 'bias' in name:\n",
    "        init.constant_(param, val=0)\n",
    "        print(name, param.data)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 自定义初始化方法\n",
    "有时候我们需要的初始化方法并没有在init模块中提供。这时，可以实现一个初始化方法，从而能够像使用其他初始化方法那样使用它。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.weight tensor([[ 0.0000, -0.0000,  8.1338, -0.0000],\n",
      "        [-7.7919,  6.5861, -0.0000,  8.9023],\n",
      "        [-0.0000, -0.0000,  0.0000, -5.4401]])\n",
      "2.weight tensor([[ 0.0000, -7.1177, -8.7854]])\n"
     ]
    }
   ],
   "source": [
    "# 实现一个自定义的初始化方法。\n",
    "# 权重有一半概率初始化为0，有另一半概率初始化为[−10,−5]和[5,10]两个区间里均匀分布的随机数。\n",
    "\n",
    "def init_weight_(tensor):\n",
    "    with torch.no_grad():\n",
    "        tensor.uniform_(-10, 10)\n",
    "        tensor *= (tensor.abs() >= 5).float()\n",
    "\n",
    "for name, param in net.named_parameters():\n",
    "    if 'weight' in name:\n",
    "        init_weight_(param)\n",
    "        print(name, param.data)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.bias tensor([1., 1., 1.])\n",
      "2.bias tensor([1.])\n"
     ]
    }
   ],
   "source": [
    "# 还可以通过改变这些参数的data来改写模型参数值同时不会影响梯度\n",
    "for name, param in net.named_parameters():\n",
    "    if 'bias' in name:\n",
    "        param.data += 1\n",
    "        print(name, param.data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 共享模型参数\n",
    "在有些情况下，我们希望在多个层之间共享模型参数。如何共享模型参数: Module类的forward函数里多次调用同一个层。此外，如果我们传入Sequential的模块是同一个Module实例的话参数也是共享的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sequential(\n",
      "  (0): Linear(in_features=1, out_features=1, bias=False)\n",
      "  (1): Linear(in_features=1, out_features=1, bias=False)\n",
      ")\n",
      "0.weight tensor([[3.]])\n"
     ]
    }
   ],
   "source": [
    "linear = nn.Linear(1, 1, bias=False)\n",
    "net = nn.Sequential(linear, linear)\n",
    "print(net)\n",
    "for name, param in net.named_parameters():\n",
    "    init.constant_(param, val=3)\n",
    "    print(name, param.data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n",
      "True\n"
     ]
    }
   ],
   "source": [
    "# 在内存中，这两个线性层其实一个对象:\n",
    "print(id(net[0]) == id(net[1]))\n",
    "print(id(net[0].weight) == id(net[1].weight))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor(9., grad_fn=<SumBackward0>)\n",
      "tensor([[6.]])\n"
     ]
    }
   ],
   "source": [
    "# 因为模型参数里包含了梯度，\n",
    "# 所以在反向传播计算时，这些共享的参数的梯度是累加的\n",
    "\n",
    "x = torch.ones(1, 1)\n",
    "y = net(x).sum()\n",
    "print(y)\n",
    "y.backward()\n",
    "print(net[0].weight.grad)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "yczlab_3.6",
   "language": "python",
   "name": "yczlab_python3.6"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
